Tech Arsenal 1

home *** CD-ROM | disk | FTP | other *** search

/ Tech Arsenal 1 / Tech Arsenal (Arsenal Computer).ISO / tek-02 / kruse_11.zip / 11SOL.EXT < prev next >

Wrap

Text File | 1990-12-01 | 18KB | 582 lines

{Section 11.3 Phase 1: Splitting the Text into Words} {Exercise E6} const nfiles = 8; {number of temporary files for unprocessed words} type filecode = 1..nfiles; var RefFile: array[filecode] of fileref; {local files used for auxiliary storage of words from phase 1 to phase 2: Normally, a separate file exist for each initial letter; this version uses nfiles files due to operating system constraints.} procedure SplitWords; var outcount: array[filecode] of integer; {counter for word files} code: filecode; {Into which file does word go?} {The remainder of the local declarations are unchanged.} begin {procedure SplitWords} Initialize; {sets up files, hash table, constants} GetWord(w); {Obtain a single word from InText.} while not endinput do begin x := HashAddress(w); if w <> hash[x] then begin {Not in hash table; put into RefFile.} code := FindFile( w[1] ); outcount[code] := outcount[code] + 1; with RefFile[code]^ do {Update the storage file.} begin wd := w; pg := pagecount end; Put(RefFile[code]) end; GetWord(w) end; Conclude {writes word counts to output} end; {procedure SplitWords} function FindFile( ch: letter): filecode; {Uses binary decision tree to select one of nfiles = 8 files depending on the letter ch. } begin {function FindFile} if ch < 'M' then if ch < 'E' then if ch < 'C' then FindFile := 1 else FindFile := 2 else if ch < 'H' then FindFile := 3 else FindFile := 4 else if ch < 'S' then if ch < 'P' then FindFile := 5 else FindFile := 6 else if ch < 'T' then FindFile := 7 else FindFile := 8 end; {function FindFile} for ch := A to Z do begin rewrite( RefFile[ch] ); outcount[ch] := 0 end; for i := 1 to nfiles do begin rewrite( RefFile[i] ); outcount[i] := 0 end; procedure Conclude; {Writes out counts of various word lists. For some systems, it is necessary to close files, which should be done here.} var i: integer; {loop index} begin {procedure Conclude} writeln('The total number of words read in is ', wordcount:7); writeln; writeln('The number of words to process further in the next stage,'); writeln('in each temporary file, is below.'); writeln('a - b c - d e - g h - l m - o p - r s t - z'); for i := 1 to nfiles do write(outcount[i]:7); writeln; writeln end; {procedure Conclude} {Section 11.4 Phase 2: Classifying the Words} {Exercise E3} procedure ClassifyWords; {The references stored in the temporary files are placed into a list, the words from the file InIndex are compared with the words in the list as they are merged into the file NewIndex.} type wordtype = (hash, count, index); {ways to process a word} pointref = ^reflist; reflist = record {list of page references} pg: integer; next: pointref end; pointer = ^node; node = record {node of list storing words} wd: word; kind: wordtype; ct: integer; ref: pointref; next: pointer end; {Cannot use varying types as wordtype is not known upon first reading.} list = record head: pointer end; var code: filecode; {index used to loop through temporary files} NewList: list; procedure Merge(p, q: pointer; var r: pointer); {Merges two sorted lists into one, that will begin at r; requires that both lists be nonempty. This version is modified slightly from the version listed in the text due to a difference in the data structures used.} var s: pointer; {always points to last node of sorted list} begin {procedure Merge} if (p = nil) or (q = nil) then writeln('Merge called with empty list(s).'); if p^.wd <= q^.wd then {First find the head, r, of the merged list.} begin {Note the change from .info.key to .wd. } r := p; p := p^.next end else begin r := q; q := q^.next end; s := r; {s always points to the last entry of the merged list.} while (p <> nil) and (q <> nil) do if p^.wd <= q^.wd then {Note the change from .info.key to .wd. } begin s^.next := p; {Attach the node with the smaller key to the sorted list.} s := p; p := p^.next {Advance to the next unmerged node.} end else begin s^.next := q; s := q; q := q^.next end; if p = nil then {After one list is exhausted, attach the remainder of the other one.} s^.next := q else s^.next := p end; {procedure Merge} {Include the procedures MergeSort and Divide from Chapter 7 here.} procedure MainMergeSort(var L: list); { Main procedure to invoke recursive procedure MergeSort, as listed in the text. } begin MergeSort(L.head) end; procedure InitializeList(var L: list); begin L.head := nil end; procedure Insert(x: reference; var L: list); { Inserts the reference into the hash table of references. } var done: Boolean; p: pointer; q: pointref; begin {procedure Insert} done := false; p := L.head; while (p <> nil) and (not done) do begin if p^.wd = x.wd then {The word is already in the list, update its node.} begin p^.ct := p^.ct + 1; new(q); q^.pg := x.pg; q^.next := p^.ref; p^.ref := q; done := true end else p := p^.next end; if not done then begin {Insert a new entry if the word is not already in the table.} p := nil; new(p); p^.wd := x.wd; p^.ct := 1; {Initialize the count and the page references.} new(q); q^.pg := x.pg; q^.next := nil; p^.ref := q; p^.next := L.head; L.head := p end end; {procedure Insert} procedure Append(p: pointer; var L: list); { Append the nodes pointed to by p to the end of the list L. } var q: pointer; begin {procedure Append} q := L.head; if q = nil then {The list is empty, make p the beginning of the list.} L.head := p else begin while q^.next <> nil do {Find the end of the list.} q := q^.next; q^.next := p end end; {procedure Append} procedure Place(var F: fileref; var L: list); { Places the words in file F into the list of words. } var x: reference; temp: pointer; begin {procedure Place} temp := L.head; {Save the words that have already been processed.} L.head := nil; reset(F); while not eof(F) do {Insert all the words into the list.} begin x := F^; get(F); Insert(x, L) end; MainMergeSort(L); {Sort the list and append the other work to the list.} Append(temp, L) end; {procedure Place} procedure RemoveFirst(var p: pointer; var L: list); { Removes the first node from the list L. } begin {procedure RemoveFirst} p := L.head; if not Empty(L) then {standard list operation} begin L.head := L.head^.next; p^.next := nil end end; {procedure RemoveFirst} procedure ReadReference(var r: pointer; var F: text); { Reads reference from the file F. } var k: char; begin {procedure ReadReference} if eof(F) then r := nil else begin ReadWord(F, r^.wd); readln(F, k); case k of 'F', 'f': r^.kind := hash; 'C', 'c': begin r^.kind := count; r^.ct := 0 end; 'I', 'i': begin r^.kind := index; r^.ref := nil end end end end; {procedure ReadReference} procedure WriteReference(p: pointer; var NewIndex, NewHashFile: text); {writes a word to the appropriate file with the appropriate associated information} var q: pointref; begin {procedure WriteReference} with p^ do case kind of hash: begin {Write the word to the hash file.} WriteWord(NewHashFile, wd); writeln(NewHashFile) end; count:begin {Write the word and its frequency to the new index file.} WriteWord(NewIndex, wd); write(NewIndex, 'c'); writeln(NewIndex, ct:5) end; index:begin {Write the word and its page numbers to the new index file.} WriteWord(NewIndex, wd); write(NewIndex, 'i'); q := ref; while q <> nil do begin write(NewIndex, q^.pg:5); q := q^.next end; writeln(NewIndex) end end end; {procedure WriteReference} procedure GetWordType(p: pointer); { Request the user to specify the category of the given word. } var response: char; begin {procedure GetWordType} with p^ do begin repeat WriteWord(output, wd); write(' is (F, C, I)?'); readln(response) until response in ['F', 'f', 'C', 'c', 'I', 'i']; case response of 'F', 'f': kind := hash; 'C', 'c': kind := count; 'I', 'i': kind := index end end end; {procedure GetWordType} procedure Delete(var p: pointer); { Delete the word p^ as well as all of the page references associated with it. } var q, r: pointref; begin {procedure Delete} if p^.kind = index then begin q := p^.ref; while q <> nil do begin {Dispose the list of page references associated with the word.} r := q^.next; dispose(q); p^.ref := r; q := r end end; dispose(p) {dispose the node itself} end; {procedure Delete} procedure CompareAndMerge(var L: list; var InIndex, NewIndex, NewHashFile: text); { Compare the list L with InIndex, merge if was found. } var p, r: pointer; begin {procedure CompareAndMerge} RemoveFirst(p, L); new(r); ReadReference(r, InIndex); while p <> nil do if r = nil then begin GetWordType(p); WriteReference(p, NewIndex, NewHashFile); Delete(p); {Remove reference list and node from memory.} RemoveFirst(p, L) end if p^.wd < r^.wd then begin GetWordType(p); WriteReference(p, NewIndex, NewHashFile); Delete(p); {Remove reference list and node from memory.} RemoveFirst(p, L) end else if p^.wd > r^.wd then {Do not write a word that is not used to NewIndex.} ReadReference(r, InIndex) else begin {p^.wd = r^.wd} p^.kind := r^.kind; WriteReference(p, NewIndex, NewHashFile); Delete(p); {Remove reference list and node from memory.} RemoveFirst(p, L); ReadReference(r, InIndex) end end; {procedure CompareAndMerge} begin {procedure ClassifyWords} reset(InIndex); rewrite(NewIndex); rewrite(NewHashFile); InitializeList(NewList); for code := nfiles downto 1 do {Place the words from each file into the list.} Place(RefFile[code], NewList); if not Empty(NewList) then {standard list operation} CompareAndMerge(NewList, InIndex, NewIndex, NewHashFile) end; {procedure ClassifyWords} {Exercise E4} procedure ClassifyWords; {The references stored in the temporary files are placed in a new hash table, the words from the file InIndex are compared with the words in the new table as they are merged into the file NewIndex.} const RefTableSize = 3023; {Size of the hash table to temporarily store words.} RefTableMax = 3022; type wordtype = (hash, count, index); {ways to process a word} pointref = ^reflist; reflist = record {list of page references} pg: integer; next: pointref end; pointer = ^node; node = record {Node of list storing words.} wd: word; kind: wordtype; ct: integer; ref: pointref; next: pointer end; {Cannot use varying types as wordtype is not known upon first reading.} list = record head: pointer end; RefHashTable = array[0..RefTableMax] of list; var code: filecode; {index used to loop through temporary files} RefTable: RefHashTable; {stores all references in memory} NewList: list; procedure InitializeTable(var RefTable: RefHashTable); var i: integer; begin {procedure InitializeTable} for i := 0 to RefTableMax do RefTable[i].head := nil end; {procedure InitializeTable} function RefTableAddress(x: reference): integer; { Returns hashed address of reference. } var i, h: integer; begin {function Hash} h := 0; with x do for i := 1 to maxwd do h := h + ord(wd[i]); RefTableAddress := h mod RefTableSize end; {function Hash} procedure Insert(x: reference; pos: integer; var RefTable: RefHashTable); {inserts the reference into the hash table of references} var done: Boolean; p: pointer; q: pointref; begin {procedure Insert} done := false; p := RefTable[pos].head; while (p <> nil) and (not done) do begin {Search for the word, update the reference if it is found.} if p^.wd = x.wd then begin p^.ct := p^.ct + 1; {Update count and page reference.} new(q); q^.pg := x.pg; q^.next := p^.ref; p^.ref := q; done := true end else p := p^.next end; if not done then begin {Insert a new entry if the word is not in the table.} p := nil; new(p); p^.wd := x.wd; p^.ct := 1; {Initialize the count and the page references.} new(q); q^.pg := x.pg; q^.next := nil; p^.ref := q; p^.next := RefTable[pos].head; RefTable[pos].head := p end end; {procedure Insert} procedure Place(var F: fileref; var RefTable: RefHashTable); {places the words in file F into the reference table} var x: reference; begin {procedure Place} reset(F); while not eof(F) do begin x := F^; get(F); Insert(x, RefTableAddress(x), RefTable) end end; {procedure Place} procedure LinkEntries(var RefTable: RefHashTable; var NewList: list); { The references in the hashed table are combined into the list NewList. } var i: integer; p: pointer; begin {procedure LinkEntries} i := 0; while (i < RefTableMax) and Empty(RefTable[i]) do {Find the first entry.} i := i + 1; if i <= RefTableMax then begin NewList.head := RefTable[i].head; {Initialize the list to point to the first entry.} p := RefTable[i].head; if p <> nil then {Find the end of the first chain.} while p^.next <> nil do p := p^.next; while (i < RefTableMax) do {Link remaining entries into the list.} begin i := i + 1; if not Empty(RefTable[i]) then {standard list procedure} begin p^.next := RefTable[i].head; while p^.next <> nil do {Move p to the end of the chain.} p := p^.next end end end else NewList.head := nil end; {procedure LinkEntries} {See the previous exercise for the following procedures.} procedure RemoveFirst(var p: pointer; var L: list); procedure ReadReference(var r: pointer; var F: text); procedure WriteReference(p: pointer; var NewIndex, NewHashFile: text); procedure GetWordType(p: pointer); procedure Delete(var p: pointer); procedure CompareAndMerge(var L: list; var InIndex, NewIndex, NewHashFile: text); procedure Merge(p, q: pointer; var r: pointer); {Include the procedures MergeSort and Divide from Chapter 7 here.} procedure MainMergeSort(var L: list); begin {procedure ClassifyWords} reset(InIndex); rewrite(NewIndex); rewrite(NewHashFile); InitializeTable(RefTable); for code := 1 to nfiles do {Place all the words into the reference hash table.} Place(RefFile[code], RefTable); LinkEntries(RefTable, NewList); {Link the entries of the table into a list.} MainMergeSort(NewList); if not Empty(NewList) then CompareAndMerge(NewList, InIndex, NewIndex, NewHashFile) end; {procedure ClassifyWords}